# Copyright (c) HySoP 2011-2024
#
# This file is part of HySoP software.
# See "https://particle_methods.gricad-pages.univ-grenoble-alpes.fr/hysop-doc/"
# for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from contextlib import contextmanager
import numpy as np
from hysop.tools.htypes import check_instance
from hysop.constants import Backend
from hysop.backend.device.codegen.base.opencl_codegen import OpenClCodeGenerator
from hysop.backend.device.codegen.base.function_codegen import FunctionBase
from hysop.backend.device.codegen.base.utils import VarDict, ArgDict, WriteOnceDict
from hysop.backend.device.opencl.opencl_types import OpenClTypeGen
from hysop.backend.device.codegen.base.variables import (
CodegenVariable,
CodegenVectorClBuiltin,
CodegenVectorClBuiltinFunc,
)
[docs]
class KernelBase(FunctionBase):
def __init__(
self, kname, vec_type_hint=None, kernel_args=None, known_args=None, **kargs
):
super().__init__(
fname=kname,
output="void",
inline=False,
fargs=kernel_args,
known_args=known_args,
**kargs,
)
[docs]
class KernelCodeGenerator(KernelBase, OpenClCodeGenerator):
def __init__(
self,
name,
typegen,
work_dim,
symbolic_mode=True,
kernel_args=None,
known_vars=None,
vec_type_hint=None,
**kwds,
):
kernel_args = ArgDict() if (kernel_args is None) else kernel_args
known_vars = WriteOnceDict() if (known_vars is None) else known_vars
check_instance(typegen, OpenClTypeGen)
check_instance(kernel_args, ArgDict)
assert work_dim > 0 and work_dim <= 3, work_dim
if vec_type_hint is not None:
if vec_type_hint not in typegen.builtin_types:
msg = f"Invalid vec_type_hint '{vec_type_hint}'."
raise ValueError(msg)
if typegen.components(vec_type_hint) == 1:
vec_type_hint = None
self.vec_type_hint = vec_type_hint
self.work_dim = work_dim
if "work_dim" not in known_vars.keys():
known_vars["work_dim"] = work_dim
known_args = {}
for ka in kernel_args.keys():
if ka in known_vars.keys():
known_args[ka] = known_vars[ka]
super().__init__(
name=name,
kname=name,
typegen=typegen,
symbolic_mode=symbolic_mode,
kernel_args=kernel_args,
known_args=known_args,
known_vars=known_vars,
**kwds,
)
self.inject_vars(kernel_args)
self.symbolic_mode = symbolic_mode
self.gen_kernel_variables()
self.gen_kernel_attributes()
# return global_work_size from effective work_size and given local_work_size
# /!\ it should be garanted that global_work_size is a multiple of local_work_size
[docs]
def get_global_work_size(self, work_size, local_work_size):
work_size = np.asarray(work_size)
local_work_size = np.asarray(local_work_size)
return ((work_size + local_work_size - 1) // local_work_size) * local_work_size
[docs]
def min_ghosts(self):
ghosts = (0,) * self.work_dim
return np.asarray(ghosts)
# return a tuple of required (static,dynamic) cache bytes per workgroup
[docs]
def required_workgroup_cache_size(self, local_work_size):
return (0, 0)
[docs]
def gen_kernel_variables(self):
tg = self.typegen
work_dim = self.work_dim
sm = self.symbolic_mode
kvars = VarDict()
kvars["work_dim"] = CodegenVariable("work_dim", "uint", tg, symbolic_mode=sm)
kvars["global_index"] = CodegenVariable("GID", "int", tg)
kvars["local_index"] = CodegenVariable("LID", "int", tg)
kvars["global_size"] = CodegenVectorClBuiltinFunc(
"global_size", "G", "int", work_dim, tg, symbolic_mode=sm
)
kvars["local_size"] = CodegenVectorClBuiltinFunc(
"local_size", "L", "int", work_dim, tg, symbolic_mode=sm
)
kvars["global_id"] = CodegenVectorClBuiltinFunc(
"global_id", "gid", "int", work_dim, tg
)
kvars["local_id"] = CodegenVectorClBuiltinFunc(
"local_id", "lid", "int", work_dim, tg
)
kvars["num_groups"] = CodegenVectorClBuiltinFunc(
"num_groups", "ngroups", "int", work_dim, tg, symbolic_mode=sm
)
kvars["group_id"] = CodegenVectorClBuiltinFunc(
"group_id", "group_id", "int", work_dim, tg
)
self.update_vars(kvars)
[docs]
def gen_kernel_attributes(self):
vec_type_hint = self.vec_type_hint
local_work_size = self.vars["local_size"].value
kernel_attributes = {}
if local_work_size is not None:
lws = tuple(local_work_size) + (1,) * (3 - len(local_work_size))
kernel_attributes["reqd_work_group_size"] = (
"reqd_work_group_size({},{},{})".format(lws[0], lws[1], lws[2])
)
# if (vec_type_hint is not None):
# kernel_attributes['vec_type_hint'] = 'vec_type_hint({})'.format(vec_type_hint)
self.kernel_attributes = kernel_attributes
[docs]
def check_workitem_bounds(self, varname, compact=True):
gid = self.vars["global_id"]
if isinstance(varname, str):
N = self.vars[varname]
elif isinstance(varname, CodegenVariable):
N = varname
else:
raise TypeError("varname")
conditions = [f"({gid[i]}>={N[i]})" for i in range(self.work_dim)]
cond = " || ".join(conditions)
with self._if_(cond, compact=compact):
self.append("return;")
@contextmanager
def _kernel_(self):
name = self.fname
output = self.output
fargs, fargs_impl, cargs = self.args.build_args()
with self._codeblock_("global_scope_constants"):
for carg in cargs:
carg.ptr = False
carg.storage = "__constant"
carg.declare(self)
with super()._kernel_(
name=name,
args=fargs,
args_impl=fargs_impl,
attributes=self.kernel_attributes,
) as k:
yield k